package org.rency.crawler.common;

import java.util.regex.Pattern;
import org.rency.common.utils.tool.InetUtils;

public interface CrawlerConstants {

  public static final String DEFAULT_CHARSET                   = "UTF-8";

  /**
   * 扩展字段长度。
   */
  public static final int EXTENSION_COLUMN_LENGTH              = 200;

  /*********************************************************/
  /*********************** 默认值 *************************/
  /*********************************************************/
  /**
   * 抓取地址编号最大长度。
   */
  public static final int LENGTH_FETCH_NO                      = 200;

  /**
   * 抓取地址最大长度。
   */
  public static final int LENGTH_FETCH_TARGET_URI              = 9999;

  /**
   * 默认HTTP链接超时时间
   */
  public static final String DEFAULT_HTTP_CONNECT_TIMEOUT_MILL = "5000";

  public static final String HTTP_HEADER_CONTENT_TYPE          = "Content-Type";

  public static final String USER_AGENT                        = "User-Agent";
  /**
   * 默认HTTP Agent代理。
   */
  public static final String DEFAULT_HTTP_AGENT                = "Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0";

  /**
   * 页面权重增长步长。
   */
  public static final Double DEFAULT_PAGE_WEIGHT_STEP          = new Double("0.0000001");

  public static final Double DEFAULT_PAGE_WEIGHT               = new Double(0);

  /**
   * 本机IP地址。
   */
  public static final String LOCATION_MACHINE_IP               = InetUtils.getLocalAddr();

  /*********************************************************/
  /********************** 线程(池)相关 **********************/
  /*********************************************************/
  /**
   * 查找出栈任务守护线程。
   */
  public static final String THREAD_NAME_LOOKUP_QUEUE          = "Crawler.Lookup.Queue.Daemon";

  /*********************************************************/
  /*********************** 正则表达式 ***********************/
  /*********************************************************/
  /**
   * HTTP请求协议。
   */
  public static final Pattern PATTERN_PROTOCOL_PREFIX          = Pattern.compile("((https|http|ftp|rtsp|mms|file)?://)");

  /**
   * 默认缓存有效期(one day)
   */
  public static final int DEFAULT_CACHE_EXPIRE_TIME_SEC        = 86400;

  public static final String COLON                             = ":";
  public static final String DOT                               = ".";
  public static final String LINE                              = "_";
  public static final String EQUALS                            = "=";


  /**
   * 待抓取队列名称
   */
  public static final String QUEUE_FETCH_NAME                  = "org.rency.crawler.fetch.queue";

  /**
   * URI路径分隔符
   */
  public static final String URI_PATH_SEPARATOR                = "/";

}